##########################################################
# Project:    Talking to the Populist Radical Right
# Task:       The script counts the mentions of parties in 
#             speeches in the German parliament and
#             annotates the data set
# Author:     Jan Schwalbach (21/07/2022)
##########################################################

# Loading packages and data

library(quanteda)
library(ggrepel)
library(grid)
library(zoo)
library(stringr)
library(pscl)
library(aod)
library(ggplot2)

load(file="Corpus_Germany.Rdata")

# Deleting parties that are not analysed and the speaker

corpusD <- corpusD[!is.na(corpusD$speaker),]
corpusD <- corpusD[corpusD$party != "fraktionslos",]
corpusD <- corpusD[!is.na(corpusD$party),]
corpusD <- corpusD[corpusD$date >= "2017-09-24",]

# Annotating speeches for topics

corpusD$immigration <- 0
corpusD$immigration[!corpusD$migpres1!="TRUE"] <- 1
corpusD$immigration[!corpusD$migrationcount<=2] <- 1
corpusD$immigration[corpusD$migrationcount==0] <- 0

corpusD$education <- 0
corpusD$education[!corpusD$educationpres1!="TRUE"] <- 1
corpusD$education[!corpusD$educationcount<=2] <- 1
corpusD$education[corpusD$educationcount==0] <- 0

# Loading the dictionaries and counting for each party

dictionary <- read.csv(file = "AfD.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$AfD <- str_trim(dictionary$AfD, side = "both")
afd <- paste(" ", paste(dictionary$AfD[dictionary$AfD != ""], collapse = " | "), " ", sep = "")
corpusD$afdcount <- str_count(corpusD$text, afd) 
corpusD$afdpres <- ifelse(corpusD$afdcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "SPD.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$SPD <- str_trim(dictionary$SPD, side = "both")
SPD <- paste(" ", paste(dictionary$SPD[dictionary$SPD != ""], collapse = " | "), " ", sep = "")
corpusD$SPDcount <- str_count(corpusD$text, SPD) 
corpusD$SPDpres <- ifelse(corpusD$SPDcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "CDU.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$CDU <- str_trim(dictionary$CDU, side = "both")
CDU <- paste(" ", paste(dictionary$CDU[dictionary$CDU != ""], collapse = " | "), " ", sep = "")
corpusD$CDUcount <- str_count(corpusD$text, CDU) 
corpusD$CDUpres <- ifelse(corpusD$CDUcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "Linke.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$Linke <- str_trim(dictionary$Linke, side = "both")
Linke <- paste(" ", paste(dictionary$Linke[dictionary$Linke != ""], collapse = " | "), " ", sep = "")
corpusD$Linkecount <- str_count(corpusD$text, Linke) 
corpusD$Linkepres <- ifelse(corpusD$Linkecount > 0, TRUE, FALSE)

dictionary <- read.csv(file = "FDP.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$FDP <- str_trim(dictionary$FDP, side = "both")
FDP <- paste(" ", paste(dictionary$FDP[dictionary$FDP != ""], collapse = " | "), " ", sep = "")
corpusD$FDPcount <- str_count(corpusD$text, FDP) 
corpusD$FDPpres <- ifelse(corpusD$FDPcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "Grüne.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$Grüne <- str_trim(dictionary$Grüne, side = "both")
Grüne <- paste(" ", paste(dictionary$Grüne[dictionary$Grüne != ""], collapse = " | "), " ", sep = "")
corpusD$Grünecount <- str_count(corpusD$text, Grüne) 
corpusD$Grünepres <- ifelse(corpusD$Grünecount > 0, TRUE, FALSE) 

corpusD$year <- gsub("-[0-9][0-9]-[0-9][0-9]","",corpusD$date, fixed = FALSE)
corpusD$year <- as.numeric(corpusD$year)
corpusD$partyyear <- paste(corpusD$party,corpusD$year)


afdcount <- aggregate(corpusD$afdcount, by=list(Category=corpusD$party), FUN=sum)
names(afdcount)[2] <- "AfD"
SPDcount <- aggregate(corpusD$SPDcount, by=list(Category=corpusD$party), FUN=sum)
names(SPDcount)[2] <- "SPD"
SPDcount[1] <- NULL
CDUcount <- aggregate(corpusD$CDUcount, by=list(Category=corpusD$party), FUN=sum)
names(CDUcount)[2] <- "CDU"
CDUcount[1] <- NULL
Linkecount <- aggregate(corpusD$Linkecount, by=list(Category=corpusD$party), FUN=sum)
names(Linkecount)[2] <- "Linke"
Linkecount[1] <- NULL
FDPcount <- aggregate(corpusD$FDPcount, by=list(Category=corpusD$party), FUN=sum)
names(FDPcount)[2] <- "FDP"
FDPcount[1] <- NULL
Grünecount <- aggregate(corpusD$Grünecount, by=list(Category=corpusD$party), FUN=sum)
names(Grünecount)[2] <- "Grüne"
Grünecount[1] <- NULL

# Combining all party counts

partycounts <- cbind(afdcount,SPDcount,CDUcount,Linkecount,FDPcount,Grünecount)
partycounts$party <- partycounts$Category
partycounts$party <- gsub(" [0-9][0-9][0-9][0-9].*", "", partycounts$party, fixed = FALSE)   
partycounts1 <- partycounts

partycounts$AfD[partycounts$party == "AfD"] <- 0
partycounts$SPD[partycounts$party == "SPD"] <- 0
partycounts$CDU[partycounts$party == "CDU/CSU"] <- 0
partycounts$FDP[partycounts$party == "FDP"] <- 0
partycounts$Grüne[partycounts$party == "BÜNDNIS 90/DIE GRÜNEN"] <- 0
partycounts$Linke[partycounts$party == "DIE LINKE"] <- 0

partycounts$AfD_all <- (partycounts$AfD)/(partycounts$AfD+partycounts$SPD+partycounts$CDU+partycounts$FDP+partycounts$Grüne+partycounts$Linke)

# Counting the shares for all/left/right parties

countsall <- partycounts[partycounts$party != "AfD",]

countsleft <- countsall[countsall$party != "CDU/CSU",]
countsleft <- countsleft[countsleft$party != "FDP",]

countsright <- countsall[countsall$party != "SPD",]
countsright <- countsright[countsright$party != "BÜNDNIS 90/DIE GRÜNEN",]
countsright <- countsright[countsright$party != "DIE LINKE",]

mean(countsleft$AfD_all)
mean(countsright$AfD_all)
mean(countsall$AfD_all)

# Preparing and annotating the data set for the logit regression

corpusD <- corpusD[corpusD$party != "AfD",]

# Left parties

corpusD$left <- 0
corpusD$left[corpusD$party == "BÜNDNIS 90/DIE GRÜNEN"] <- 1
corpusD$left[corpusD$party == "SPD"] <- 1
corpusD$left[corpusD$party == "DIE LINKE"] <- 1
table(corpusD$left)

# Mainstream parties

corpusD$mainstream <- 0
corpusD$mainstream[corpusD$party == "SPD"] <- 1
corpusD$mainstream[corpusD$party == "CDU/CSU"] <- 1

corpusD$RWdummy <- corpusD$afdcount
corpusD$RWdummy[corpusD$RWdummy >= 1] <- 1
table(corpusD$type)
range(corpusD$date)

# Term

corpusD$term <- 1
corpusD$country <- "Germany"

# Government Type/party and right-wing populist size

corpusD$minority <- 0
corpusD$RRP_size <- 13.3
corpusD$support <- 0
corpusD <- corpusD[corpusD$type != "0",]
table(corpusD$Government)

corpusD$Government <- "Opposition"
corpusD$Government[corpusD$party == "SPD"] <- "Government"
corpusD$Government[corpusD$party == "CDU/CSU"] <- "Government"

Germany <- subset(corpusD, select=c("RWdummy", "immigration", "left", "education", "mainstream", "type", "term", "minority", "RRP_size", "support", "Government", "country"))
save(Germany, file = "Germany_party_mentions.Rdata")